This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

df<-read.csv("/Users/jialincheoh/analysis-2017/spring-2017/script/analysis/overall_phase5_bogota3.csv", header =TRUE, sep=",")
df
df0<-read.csv("/Users/jialincheoh/analysis-2017/spring-2017/script/analysis/overall_phase5_bogota0.csv", header =TRUE, sep=",")
df0
library(car)
avPlots(lm(len_unique ~ add.loc.45 + novelty.phase5, df))

model <- lm(len_unique ~ add.loc.15, df)
summary(model)

Call:
lm(formula = len_unique ~ add.loc.15, data = df)

Residuals:
   Min     1Q Median     3Q    Max 
 -4872  -4381  -2993  -1457  67216 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept)  4872.22    2851.70   1.709   0.0955 .
add.loc.15    -54.23      69.31  -0.782   0.4387  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 12730 on 39 degrees of freedom
Multiple R-squared:  0.01546,   Adjusted R-squared:  -0.009789 
F-statistic: 0.6122 on 1 and 39 DF,  p-value: 0.4387
plot(model)

library(MASS)
r <- rlm(len_unique ~ add.loc.15, data=df, psi=psi.bisquare) 
summary(r)

Call: rlm(formula = len_unique ~ add.loc.15, data = df, psi = psi.bisquare)
Residuals:
     Min       1Q   Median       3Q      Max 
  -78.43   -18.90    14.69   212.65 71834.50 

Coefficients:
            Value   Std. Error t value
(Intercept) 33.2673 12.8966     2.5795
add.loc.15   0.8092  0.3135     2.5814

Residual standard error: 50.49 on 39 degrees of freedom
confint.default(r)
               2.5 %   97.5 %
(Intercept) 7.990445 58.54406
add.loc.15  0.194786  1.42353
WLS.mod <- lm(len_unique ~ add.loc.15, data=df)
wts1 <- 1/fitted(lm(abs(residuals(WLS.mod)) ~ add.loc.15, df))^2
WLS.mod2 <- lm(len_unique ~ add.loc.15, data=df, weight=wts1)
summary(WLS.mod2)

Call:
lm(formula = len_unique ~ add.loc.15, data = df, weights = wts1)

Weighted Residuals:
    Min      1Q  Median      3Q     Max 
-0.7300 -0.4972 -0.4910 -0.4543  8.5424 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept)  4144.94    2330.79   1.778   0.0831 .
add.loc.15    -45.37      26.62  -1.705   0.0962 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.791 on 39 degrees of freedom
Multiple R-squared:  0.06934,   Adjusted R-squared:  0.04547 
F-statistic: 2.906 on 1 and 39 DF,  p-value: 0.09623
confint(WLS.mod2)
                 2.5 %      97.5 %
(Intercept) -569.51797 8859.399118
add.loc.15   -99.21131    8.467861
residualPlots(model)
               Test stat Pr(>|Test stat|)
add.loc.45        0.1533           0.8790
novelty.phase5   -0.6820           0.4995
Tukey test       -0.6524           0.5142

lmLength <- residuals(model)
shapiro.test(lmLength)

    Shapiro-Wilk normality test

data:  lmLength
W = 0.42786, p-value = 1.885e-11
qqnorm(lmLength)
qqline(lmLength)

library(car)
ncvTest(model)
Non-constant Variance Score Test 
Variance formula: ~ fitted.values 
Chisquare = 15.92375, Df = 1, p = 6.5946e-05
plot(lm(len_unique ~ add.loc.45 + novelty.phase5, df), pch=18, col="red", which=c(4))

qf(0.2, 3, 38)
[1] 0.3351188
qf(0.5, 3, 38)
[1] 0.803003
boxplot(df[c(6, 66, 144)]) 

plot(df[c(6, 66, 144)]) 

cor(df[c(6, 66, 144)])
               novelty.phase5 add.loc.15 len_unique
novelty.phase5      1.0000000  0.2923436 -0.1709687
add.loc.15          0.2923436  1.0000000 -0.1243208
len_unique         -0.1709687 -0.1243208  1.0000000
cor.test.p <- function(x){
    FUN <- function(x, y) cor.test(x, y)[["p.value"]] 
    z <- outer(
    colnames(x),
    colnames(x),
    Vectorize(function(i,j) FUN(x[,i], x[,j]))
  )
dimnames(z) <- list(colnames(x), colnames(x)) }

cor.test.p(df[c(6, 66, 144)])
library(car)
influencePlot(lm(len_unique ~ add.loc.45 + novelty.phase5, df))

df1<-read.csv("/Users/jialincheoh/analysis-2017/spring-2017/script/analysis/overall_phase5_bogota3_copy.csv", header =TRUE, sep=",")
df1
library(car)
avPlots(lm(len_unique ~ add.loc.45 + novelty.phase5, df1))

model1 <- lm(len_unique ~ add.loc.45 + novelty.phase5, df1)
summary(model1)

Call:
lm(formula = len_unique ~ add.loc.45 + novelty.phase5, data = df1)

Residuals:
   Min     1Q Median     3Q    Max 
 -2837  -2228  -1048   -577  36934 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)
(Intercept)    2836.786   1931.524   1.469    0.150
add.loc.45       -1.617      7.668  -0.211    0.834
novelty.phase5  -23.710     31.919  -0.743    0.462

Residual standard error: 6519 on 37 degrees of freedom
Multiple R-squared:  0.01692,   Adjusted R-squared:  -0.03622 
F-statistic: 0.3184 on 2 and 37 DF,  p-value: 0.7293
plot(model1)

lmLength <- residuals(model1)
shapiro.test(lmLength)

    Shapiro-Wilk normality test

data:  lmLength
W = 0.35354, p-value = 4.553e-12
qqnorm(lmLength)
qqline(lmLength)

library(car)
ncvTest(model1)
Non-constant Variance Score Test 
Variance formula: ~ fitted.values 
Chisquare = 21.96367, Df = 1, p = 2.7786e-06
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ21kK1NoaWZ0K0VudGVyKi4gCgpgYGB7cn0KZGY8LXJlYWQuY3N2KCIvVXNlcnMvamlhbGluY2hlb2gvYW5hbHlzaXMtMjAxNy9zcHJpbmctMjAxNy9zY3JpcHQvYW5hbHlzaXMvb3ZlcmFsbF9waGFzZTVfYm9nb3RhMy5jc3YiLCBoZWFkZXIgPVRSVUUsIHNlcD0iLCIpCmRmCmBgYAoKYGBge3J9CmRmMDwtcmVhZC5jc3YoIi9Vc2Vycy9qaWFsaW5jaGVvaC9hbmFseXNpcy0yMDE3L3NwcmluZy0yMDE3L3NjcmlwdC9hbmFseXNpcy9vdmVyYWxsX3BoYXNlNV9ib2dvdGEwLmNzdiIsIGhlYWRlciA9VFJVRSwgc2VwPSIsIikKZGYwCmBgYAoKYGBge3J9CmxpYnJhcnkoY2FyKQphdlBsb3RzKGxtKGxlbl91bmlxdWUgfiBhZGQubG9jLjQ1ICsgbm92ZWx0eS5waGFzZTUsIGRmKSkKYGBgCgpgYGB7cn0KbW9kZWwgPC0gbG0obGVuX3VuaXF1ZSB+IGFkZC5sb2MuMTUsIGRmKQpzdW1tYXJ5KG1vZGVsKQpwbG90KG1vZGVsKQpgYGAKYGBge3J9CmxpYnJhcnkoTUFTUykKciA8LSBybG0obGVuX3VuaXF1ZSB+IGFkZC5sb2MuMTUsIGRhdGE9ZGYsIHBzaT1wc2kuYmlzcXVhcmUpIApzdW1tYXJ5KHIpCmBgYAoKYGBge3J9CmNvbmZpbnQuZGVmYXVsdChyKQpgYGAKCmBgYHtyfQpXTFMubW9kIDwtIGxtKGxlbl91bmlxdWUgfiBhZGQubG9jLjE1LCBkYXRhPWRmKQp3dHMxIDwtIDEvZml0dGVkKGxtKGFicyhyZXNpZHVhbHMoV0xTLm1vZCkpIH4gYWRkLmxvYy4xNSwgZGYpKV4yCldMUy5tb2QyIDwtIGxtKGxlbl91bmlxdWUgfiBhZGQubG9jLjE1LCBkYXRhPWRmLCB3ZWlnaHQ9d3RzMSkKc3VtbWFyeShXTFMubW9kMikKYGBgCgpgYGB7cn0KY29uZmludChXTFMubW9kMikKYGBgCgoKYGBge3J9CnJlc2lkdWFsUGxvdHMobW9kZWwpCmBgYApgYGB7cn0KbG1MZW5ndGggPC0gcmVzaWR1YWxzKG1vZGVsKQpzaGFwaXJvLnRlc3QobG1MZW5ndGgpCnFxbm9ybShsbUxlbmd0aCkKcXFsaW5lKGxtTGVuZ3RoKQpgYGAKYGBge3J9CmxpYnJhcnkoY2FyKQpuY3ZUZXN0KG1vZGVsKQpgYGAKCmBgYHtyfQpwbG90KGxtKGxlbl91bmlxdWUgfiBhZGQubG9jLjQ1ICsgbm92ZWx0eS5waGFzZTUsIGRmKSwgcGNoPTE4LCBjb2w9InJlZCIsIHdoaWNoPWMoNCkpCmBgYAoKYGBge3J9CnFmKDAuMiwgMywgMzgpCmBgYAoKYGBge3J9CnFmKDAuNSwgMywgMzgpCmBgYAoKYGBge3J9CmJveHBsb3QoZGZbYyg2LCA2NiwgMTQ0KV0pIApwbG90KGRmW2MoNiwgNjYsIDE0NCldKSAKY29yKGRmW2MoNiwgNjYsIDE0NCldKQpjb3IudGVzdC5wIDwtIGZ1bmN0aW9uKHgpewogICAgRlVOIDwtIGZ1bmN0aW9uKHgsIHkpIGNvci50ZXN0KHgsIHkpW1sicC52YWx1ZSJdXSAKICAgIHogPC0gb3V0ZXIoCiAgICBjb2xuYW1lcyh4KSwKICAgIGNvbG5hbWVzKHgpLAogICAgVmVjdG9yaXplKGZ1bmN0aW9uKGksaikgRlVOKHhbLGldLCB4WyxqXSkpCiAgKQpkaW1uYW1lcyh6KSA8LSBsaXN0KGNvbG5hbWVzKHgpLCBjb2xuYW1lcyh4KSkgfQoKY29yLnRlc3QucChkZltjKDYsIDY2LCAxNDQpXSkKYGBgCgpgYGB7cn0KbGlicmFyeShjYXIpCmluZmx1ZW5jZVBsb3QobG0obGVuX3VuaXF1ZSB+IGFkZC5sb2MuNDUgKyBub3ZlbHR5LnBoYXNlNSwgZGYpKQpgYGAKYGBge3J9CmRmMTwtcmVhZC5jc3YoIi9Vc2Vycy9qaWFsaW5jaGVvaC9hbmFseXNpcy0yMDE3L3NwcmluZy0yMDE3L3NjcmlwdC9hbmFseXNpcy9vdmVyYWxsX3BoYXNlNV9ib2dvdGEzX2NvcHkuY3N2IiwgaGVhZGVyID1UUlVFLCBzZXA9IiwiKQpkZjEKYGBgCiAKIApgYGB7cn0KbGlicmFyeShjYXIpCmF2UGxvdHMobG0obGVuX3VuaXF1ZSB+IGFkZC5sb2MuNDUgKyBub3ZlbHR5LnBoYXNlNSwgZGYxKSkKYGBgCmBgYHtyfQptb2RlbDEgPC0gbG0obGVuX3VuaXF1ZSB+IGFkZC5sb2MuNDUgKyBub3ZlbHR5LnBoYXNlNSwgZGYxKQpzdW1tYXJ5KG1vZGVsMSkKcGxvdChtb2RlbDEpCmBgYAogCmBgYHtyfQpsbUxlbmd0aCA8LSByZXNpZHVhbHMobW9kZWwxKQpzaGFwaXJvLnRlc3QobG1MZW5ndGgpCnFxbm9ybShsbUxlbmd0aCkKcXFsaW5lKGxtTGVuZ3RoKQpgYGAKYGBge3J9CmxpYnJhcnkoY2FyKQpuY3ZUZXN0KG1vZGVsMSkKYGBgCgo=